Libraries

Load data

file_path_t <- "/Users/juliat/Desktop/bachelor/Final thesis/ccc_cellchat.t.2.rds"
cellchat.t <- readRDS(file_path_t)
file_path_s <- "/Users/juliat/Desktop/bachelor/Final thesis/ccc_cellchat.s.2.rds"
cellchat.s <- readRDS(file_path_s)

Creating dgCMatrix (to avoid turning all data into a matrix, we take ) and merge data

cellchat.t.sC <- subsetCommunication(cellchat.t, thresh=1) # tresh = 1 not to loose data, because it is not possible to compare 13008 and 12058 rows
cellchat.s.sC <- subsetCommunication(cellchat.s, thresh=1)

# merge the two data frames
merged_data <- merge(cellchat.t.sC, cellchat.s.sC, 
                     by = c("source", "target", "ligand", "receptor", "interaction_name",
                            "interaction_name_2", "pathway_name", "annotation", "evidence"),
                     all = TRUE)

Replace NA values with 0

merged_data <- merged_data %>%
  mutate(
    prob.t = ifelse(is.na(prob.x), 0, prob.x),
    prob.s = ifelse(is.na(prob.y), 0, prob.y),
    pval.t = ifelse(is.na(pval.x), 0, pval.x),
    pval.s = ifelse(is.na(pval.y), 0, pval.y),
  )

Calculating the difference between probabilities tumor - skin

merged_data <- merged_data %>%
  mutate(prob.delta = prob.t - prob.s) %>%
  arrange(desc(abs(prob.delta)))

Count interactions for skin and tumor (actual numeric difference )

cellchat.t.sC_counts <- cellchat.t.sC %>% group_by(source, target) %>% summarise(count_t = n())
## `summarise()` has grouped output by 'source'. You can override using the
## `.groups` argument.
cellchat.s.sC_counts <- cellchat.s.sC %>% group_by(source, target) %>% summarise(count_s = n())
## `summarise()` has grouped output by 'source'. You can override using the
## `.groups` argument.
# Merge counts
merged_df.interactions <- merge(cellchat.t.sC_counts, cellchat.s.sC_counts, by=c("source", "target"), all = TRUE)
# Replacing NA with 0 in case some interactions are not present in one of the data frames
merged_df.interactions[is.na(merged_df.interactions)] <- 0
# Calculate the difference in counts:
merged_df.interactions$interaction_difference <- merged_df.interactions$count_t - merged_df.interactions$count_s

Heatmap based on counts difference

heatmap_plot <- ggplot(merged_df.interactions, aes(x=target, y=source, fill=interaction_difference)) + 
  geom_tile() +
  theme(axis.text.x = element_text(angle = 90)) +
  labs(title = "Difference in Number of Interactions (Tumor - Skin)", x = "target (receptor)", y = "source (ligand)", fill = "Interactions Difference") +
  scale_fill_gradient2(name = "Interactions Diff", high = rgb(178/255, 24/255, 43/255), low = rgb(33/255, 102/255, 172/255)) # Diverging color scale
print(heatmap_plot)

Bubble Plot

netVisual_bubble(cellchat.t, sources.use = 8, targets.use = 8, thresh = 0.05) # Fibroblast_32 to Fibroblast_32
## Comparing communications on a single object

netVisual_bubble(cellchat.t, sources.use = 8, targets.use = 28, thresh = 0.05) # Fibroblast_32 to T naive CD4
## Comparing communications on a single object

netVisual_bubble(cellchat.t, sources.use = 3, targets.use = 28, thresh = 0.05) # Endothel_vasc._31 to T naive CD4 
## Comparing communications on a single object

netVisual_bubble(cellchat.t, sources.use = 9, targets.use = 9, thresh = 0.05) # Fibroblast_36 to Fibroblast_36
## Comparing communications on a single object

netVisual_bubble(cellchat.t, sources.use = 8, targets.use = 2, thresh = 0.05) # Fibroblast_32 to Endothel
## Comparing communications on a single object

netVisual_bubble(cellchat.t, sources.use = 3, targets.use = 8, thresh = 0.05) # Endothel_vasc._31 to Fibroblast_32 
## Comparing communications on a single object

netVisual_bubble(cellchat.t, sources.use = 5, targets.use = 28, thresh = 0.05) # Fibroblast_18 to T naive CD4  5 28
## Comparing communications on a single object

netVisual_bubble(cellchat.t, sources.use = 8, targets.use = 13, thresh = 0.05) # Fibroblast_32 to Monocyte 
## Comparing communications on a single object

netVisual_bubble(cellchat.t, sources.use = 9, targets.use = 28, thresh = 0.05) # Fibroblast_36 to T naive CD4
## Comparing communications on a single object

netVisual_bubble(cellchat.t, sources.use = 6, targets.use = 2, thresh = 0.05) # Fibroblast_20 to Endothel
## Comparing communications on a single object

CELLCHAT tumor pathways

# cellchat.t@netP$pathways
# cellchat.t@meta$DE = factor(cellchat.t@meta$DE, levels = c ("B cell", "Endothel", "Endothel_vasc._31", "Erythrocyte", "Fibroblast_18", "Fibroblast_20", "Fibroblast_25", "Fibroblast_32", "Fibroblast_36", "T gd", "Keratinocyte_26", "Mast_27", "Monocyte", "Monocyte_23", "Neurons_47", "Neutrophil", "NK", "NK T like", "Plasma_15", "Plasma_35", "Plasma_37", "Platelet", "Schwann_43", "T cytotox. CD8", "T fh", "T mem 1", "T mem 2", "T naive CD4", "T reg CD4", "T undef."))
# x <- "COLLAGEN"
# for(x in cellchat.t@netP$pathways){
#   p <- plotGeneExpression(cellchat.t, signaling = x)
#   ggsave(paste0(x, ".pdf"),plot=p, width=10, height=10)
#   }

CELLCHAT skin pathways

# cellchat.s@netP$pathways
# cellchat.s@meta$DE = factor(cellchat.s@meta$DE, levels = c ("B cell", "Endothel", "Endothel_vasc._31", "Erythrocyte", "Fibroblast_18", "Fibroblast_20", "Fibroblast_25", "Fibroblast_32", "Fibroblast_36", "T gd", "Keratinocyte_26", "Mast_27", "Monocyte", "Monocyte_23", "Neurons_47", "Neutrophil", "NK", "NK T like", "Plasma_15", "Plasma_35", "Plasma_37", "Platelet", "Schwann_43", "T cytotox. CD8", "T fh", "T mem 1", "T mem 2", "T naive CD4", "T reg CD4", "T undef."))
# y <- "COLLAGEN"
# for(y in cellchat.s@netP$pathways){
#   p <- plotGeneExpression(cellchat.s, signaling = y)
#   ggsave(paste0(y, ".pdf"),plot=p, width=10, height=10)
# }

Number of interactions per source cell (ligand) in tumor and skin

df.t <- cellchat.t %>% subsetCommunication(thresh = NA) %>% as_tibble() # creating a tibble
df.s <- cellchat.s %>% subsetCommunication(thresh = NA) %>% as_tibble()

df.t <- cellchat.t %>%
  subsetCommunication(thresh = NA) %>% 
  dplyr::as_tibble() %>%
  dplyr::count(source) %>%
  dplyr::rename(Number_of_Interactions = n)

df.t$Type <- "Tumor"

df.s <- cellchat.s %>%
  subsetCommunication(thresh = NA) %>% 
  dplyr::as_tibble() %>% 
  dplyr::count(source) %>% 
  dplyr::rename(Number_of_Interactions = n)

df.s$Number_of_Interactions <- -df.s$Number_of_Interactions # making counts negative
df.s$Type <- "Skin"

combined_data <- bind_rows(df.t, df.s)

# Plot tumor + skin
combined_plot <- ggplot(combined_data, aes(x = source, y = Number_of_Interactions, fill = Type)) +
  geom_bar(stat = "identity", position = "stack") +
  labs(title = "Number of interactions per source cell", y = "Number of Interactions", x = "Source Cell") +
  scale_fill_manual(values = c("Skin" = rgb(33/255, 102/255, 172/255), "Tumor" = rgb(178/255, 24/255, 43/255))) +
    theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1) # hjust = 1 to "press" names to plot
  )
print(combined_plot)

Number of interactions per source cell (ligand) in tumor - skin

# Compute interaction counts for each dataset
df.t <- cellchat.t %>%
  subsetCommunication(thresh = NA) %>% 
  dplyr::as_tibble() %>% 
  dplyr::count(source) %>% 
  dplyr::rename(Tumor_Interactions = n)
df.s <- cellchat.s %>% 
  subsetCommunication(thresh = NA) %>% 
  dplyr::as_tibble() %>% 
  dplyr::count(source) %>% 
  dplyr::rename(Skin_Interactions = n)

# Merge datasets by the 'source' (28 levels - celltypes) column
combined_data.diff <- full_join(df.t, df.s, by = "source")

# Remove NA values because T naive CD4 in Skin_Interactions = NA
combined_data.diff$Tumor_Interactions[is.na(combined_data.diff$Tumor_Interactions)] <- 0
combined_data.diff$Skin_Interactions[is.na(combined_data.diff$Skin_Interactions)] <- 0

# Compute the difference for each source: skin - tumor
combined_data.diff$Interaction_Difference <- combined_data.diff$Tumor_Interactions - combined_data.diff$Skin_Interactions

# Display the table (screenshot saved)
print(combined_data.diff, n = 28)# Display the table (screenshot saved)
## # A tibble: 28 × 4
##    source            Tumor_Interactions Skin_Interactions Interaction_Difference
##    <fct>                          <dbl>             <dbl>                  <dbl>
##  1 B cell                           386               456                    -70
##  2 Endothel                         916               638                    278
##  3 Endothel_vasc._31               1111               860                    251
##  4 Erythrocyte                       90                92                     -2
##  5 Fibroblast_18                   1189              1100                     89
##  6 Fibroblast_20                   1022               813                    209
##  7 Fibroblast_25                    566               458                    108
##  8 Fibroblast_32                   1340              1002                    338
##  9 Fibroblast_36                   1151              1241                    -90
## 10 T gd                             271               300                    -29
## 11 Keratinocyte_26                  567               615                    -48
## 12 Mast_27                          158               127                     31
## 13 Monocyte                         477               352                    125
## 14 Monocyte_23                      287               175                    112
## 15 Neurons_47                        25               174                   -149
## 16 Neutrophil                       108               157                    -49
## 17 NK                               248               346                    -98
## 18 NK T like                        309               242                     67
## 19 Plasma_15                        314               329                    -15
## 20 Plasma_35                         64               165                   -101
## 21 Plasma_37                        294               313                    -19
## 22 Schwann_43                       839               803                     36
## 23 T cytotox. CD8                   281               329                    -48
## 24 T fh                             568               504                     64
## 25 T mem 1                          219               266                    -47
## 26 T mem 2                          259               309                    -50
## 27 T naive CD4                      213                 0                    213
## 28 T reg CD4                        238               289                    -51
# Plot tumor - skin with colors for positive and negative differences
difference_plot <- ggplot(combined_data.diff, aes(x = source, y = Interaction_Difference, 
                                                   fill = Interaction_Difference > 0)) + # Moved inside aes()
  geom_bar(stat = "identity", position = "dodge") +
  scale_fill_manual(values = c("TRUE" = rgb(178/255, 24/255, 43/255), "FALSE" = rgb(33/255, 102/255, 172/255)),
                    labels = c("TRUE" = "Tumor", "FALSE" = "Skin")) +
  theme_minimal() +
  labs(title = "Difference in interactions per source cell (Tumor - Skin)", 
       y = "Difference in Interactions", 
       x = "Source Cell") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

difference_plot

Scatter plot source

scatter_plot <- ggplot(combined_data.diff, aes(x = Tumor_Interactions, y = Skin_Interactions, label = source)) +
  geom_point(aes(color = source), size = 3) +
  geom_text(aes(label = source), size = 3) +
  geom_smooth(method = "lm",se = TRUE) +
  labs(title = "Number of Interactions per source Cell", x = "Tumor Interactions", y = "Skin Interactions") +
  theme(legend.position = "none")+
    theme_minimal() 

print(scatter_plot)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

Number of interactions per target cell (receptor) in tumor and skin

df.t <- cellchat.t %>%
  subsetCommunication(thresh = NA) %>% 
  dplyr::as_tibble() %>%
  dplyr::count(target) %>%
  dplyr::rename(Number_of_Interactions = n)

df.t$Type <- "Tumor"

df.s <- cellchat.s %>%
  subsetCommunication(thresh = NA) %>% 
  dplyr::as_tibble() %>% 
  dplyr::count(target) %>% 
  dplyr::rename(Number_of_Interactions = n)

df.s$Number_of_Interactions <- -df.s$Number_of_Interactions # making counts negative
df.s$Type <- "Skin"

combined_data <- bind_rows(df.t, df.s)

# Plot tumor + skin
combined_plot <- ggplot(combined_data, aes(x = target, y = Number_of_Interactions, fill = Type)) +
  geom_bar(stat = "identity", position = "stack") +
  labs(title = "Number of interactions per target cell", y = "Number of Interactions", x = "Target Cell") +
  scale_fill_manual(values = c("Skin" = rgb(33/255, 102/255, 172/255), "Tumor" = rgb(178/255, 24/255, 43/255))) +
    theme_minimal()  +
  theme(axis.text.x = element_text(angle = 90, hjust = 1) # hjust = 1 to "press" names to plot
  )
print(combined_plot)

Number of interactions per target cell (receptor) in tumor - skin

# Compute interaction counts for each dataset
df.t <- cellchat.t %>%
  subsetCommunication(thresh = NA) %>% 
  dplyr::as_tibble() %>% 
  dplyr::count(target) %>% 
  dplyr::rename(Tumor_Interactions = n)
df.s <- cellchat.s %>% 
  subsetCommunication(thresh = NA) %>% 
  dplyr::as_tibble() %>% 
  dplyr::count(target) %>% 
  dplyr::rename(Skin_Interactions = n)

# Merge datasets by the 'target' (28 levels - celltypes) column
combined_data.diff <- full_join(df.t, df.s, by = "target")

# Remove NA values because T naive CD4 in Skin_Interactions = NA
combined_data.diff$Tumor_Interactions[is.na(combined_data.diff$Tumor_Interactions)] <- 0
combined_data.diff$Skin_Interactions[is.na(combined_data.diff$Skin_Interactions)] <- 0

# Compute the difference for each target: tumor - skin
combined_data.diff$Interaction_Difference <- combined_data.diff$Tumor_Interactions  - combined_data.diff$Skin_Interactions
print (combined_data.diff, n = 27) # Display the table (screenshot saved)
## # A tibble: 27 × 4
##    target            Tumor_Interactions Skin_Interactions Interaction_Difference
##    <fct>                          <dbl>             <dbl>                  <dbl>
##  1 B cell                           299               333                    -34
##  2 Endothel                         591               291                    300
##  3 Endothel_vasc._31                927               929                     -2
##  4 Fibroblast_18                    775               659                    116
##  5 Fibroblast_20                    565               428                    137
##  6 Fibroblast_25                    190               152                     38
##  7 Fibroblast_32                    865               575                    290
##  8 Fibroblast_36                    721               976                   -255
##  9 T gd                             332               505                   -173
## 10 Keratinocyte_26                  616               705                    -89
## 11 Mast_27                          302               183                    119
## 12 Monocyte                         841               569                    272
## 13 Monocyte_23                      195               196                     -1
## 14 Neurons_47                        25               162                   -137
## 15 Neutrophil                       278               257                     21
## 16 NK                               554               691                   -137
## 17 NK T like                        239               282                    -43
## 18 Plasma_15                        520               542                    -22
## 19 Plasma_35                        284               168                    116
## 20 Plasma_37                        501               535                    -34
## 21 Schwann_43                       691               595                     96
## 22 T cytotox. CD8                   612               588                     24
## 23 T fh                             767               867                   -100
## 24 T mem 1                          423               339                     84
## 25 T mem 2                          450               448                      2
## 26 T naive CD4                      481                 0                    481
## 27 T reg CD4                        466               480                    -14
# Plot tumor - skin with colors for positive and negative differences
difference_plot <- ggplot(combined_data.diff, aes(x = target, y = Interaction_Difference, fill = Interaction_Difference > 0)) + # Moved inside aes()
  geom_bar(stat = "identity", position = "dodge") +
  scale_fill_manual(values = c("TRUE" = rgb(178/255, 24/255, 43/255), "FALSE" = rgb(33/255, 102/255, 172/255)),
                    labels = c("TRUE" = "Tumor", "FALSE" = "Skin")) +
  theme_minimal() +
  labs(title = "Difference in interactions per target cell (Tumor - Skin)", 
       y = "Difference in Interactions", 
       x = "Target Cell") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

difference_plot

Scatter plot target

scatter_plot <- ggplot(combined_data.diff, aes(x = Tumor_Interactions, y = Skin_Interactions, label = target)) + 
  geom_point(aes(color = target), size = 3) +
  geom_text(aes(label = target), size = 3) +
  geom_smooth(method = "lm",se = TRUE) +
  labs(title = "Number of Interactions per target Cell", x = "Tumor Interactions", y = "Skin Interactions") +
  theme(legend.position = "none")+
    theme_minimal() 

print(scatter_plot)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

Plotting top 10 interactions with biggest differences in tumor - skin

# Combining data tumor and skin 
df.t <- cellchat.t %>%
  subsetCommunication(thresh = NA) %>% 
  dplyr::as_tibble() %>% 
  dplyr::group_by(source, target) %>%
  dplyr::summarise(Tumor_Interactions = n())
## `summarise()` has grouped output by 'source'. You can override using the
## `.groups` argument.
df.s <- cellchat.s %>% 
  subsetCommunication(thresh = NA) %>% 
  dplyr::as_tibble() %>% 
  dplyr::group_by(source, target) %>% 
  dplyr::summarise(Skin_Interactions = n())
## `summarise()` has grouped output by 'source'. You can override using the
## `.groups` argument.
# Merge datasets by the 'source' and 'target'
combined_data.diff <- full_join(df.t, df.s, by = c("source", "target"))

# Replace NAs with 0
combined_data.diff$Tumor_Interactions[is.na(combined_data.diff$Tumor_Interactions)] <- 0
combined_data.diff$Skin_Interactions[is.na(combined_data.diff$Skin_Interactions)] <- 0

# Compute the difference for each source-target pair: skin - tumor
combined_data.diff$Interaction_Difference <- combined_data.diff$Tumor_Interactions  - combined_data.diff$Skin_Interactions

# Filter to get the top 10 interactions by absolute difference
top_10_diff <- combined_data.diff %>%
  dplyr::ungroup() %>% # Calculate differences without grouping by 'source'
  dplyr::arrange(-abs(Interaction_Difference)) %>%
  dplyr::slice_head(n = 10)

print(top_10_diff)
## # A tibble: 10 × 5
##    source     target Tumor_Interactions Skin_Interactions Interaction_Difference
##    <fct>      <fct>               <dbl>             <dbl>                  <dbl>
##  1 Fibroblas… Fibro…                 88                47                     41
##  2 Fibroblas… T nai…                 41                 0                     41
##  3 Endothel_… T nai…                 40                 0                     40
##  4 Fibroblas… Fibro…                 71               111                    -40
##  5 Fibroblas… Endot…                 59                21                     38
##  6 Endothel_… Fibro…                 80                43                     37
##  7 Fibroblas… T nai…                 37                 0                     37
##  8 Fibroblas… Monoc…                 79                43                     36
##  9 Fibroblas… T nai…                 35                 0                     35
## 10 Fibroblas… Endot…                 49                16                     33
# paired t-test
result <- t.test(top_10_diff$Tumor_Interactions, top_10_diff$Skin_Interactions, paired=TRUE)
print(result)
## 
##  Paired t-test
## 
## data:  top_10_diff$Tumor_Interactions and top_10_diff$Skin_Interactions
## t = 3.8212, df = 9, p-value = 0.004082
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  12.1584 47.4416
## sample estimates:
## mean difference 
##            29.8
###p-value = 0.004082 => statistically significant difference between "Tumor_Interactions" and "Skin_Interactions"

# Plot top 10 interactions with biggest differences 
difference_plot <- ggplot(top_10_diff, aes(x = paste(source, target, sep = "-"), y = Interaction_Difference,fill = Interaction_Difference > 0)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_fill_manual(values = c("TRUE" = rgb(178/255, 24/255, 43/255), "FALSE" = rgb(33/255, 102/255, 172/255)),
                    labels = c("TRUE" = "Tumor", "FALSE" = "Skin")) +
  theme_minimal() +
  labs(title = "Top 10 Differences in Interactions (Tumor - Skin)", y = "Difference in Interactions", x = "Interaction (Source-target)") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

print(difference_plot)

Function to process interactions between sorce and target celltypes

plot_interaction <- function(df, source_value, target_value) {
  df_filtered <- df %>%
    filter(source == source_value & target == target_value) %>%
    slice_max(order_by = abs(prob.delta), n = 100) %>%
    select(-c(pval.t, pval.s))

  # Create a label from the source and target values
  interaction_label <- paste(source_value, target_value, sep = " to ")

  # Pivot and plot using the new label
  df_filtered %>%
    pivot_longer(cols = c("prob.s", "prob.t"), names_to = "variable", values_to = "value") %>%
    ggplot(aes(x = interaction_name_2, y = value, fill = variable)) +
    geom_col(position = "dodge") +
    scale_fill_manual(values = c("prob.s" = rgb(33/255, 102/255, 172/255), "prob.t" = rgb(178/255, 24/255, 43/255))) +
    labs(x = interaction_label, y = "Probability", fill = "Probability") +
    theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
}

Plotting top 10 interactions based on abs(interactions difference)

plot_interaction(merged_data, "Fibroblast_32", "Fibroblast_32") +
    theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

plot_interaction(merged_data, "Fibroblast_32", "T naive CD4")+
    theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

plot_interaction(merged_data, "Endothel_vasc._31", "T naive CD4")+
    theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

plot_interaction(merged_data, "Fibroblast_36", "Fibroblast_36")+
    theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

plot_interaction(merged_data, "Fibroblast_32", "Endothel")+
    theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

plot_interaction(merged_data, "Endothel_vasc._31", "Fibroblast_32")+
    theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

plot_interaction(merged_data, "Fibroblast_18", "T naive CD4")+
    theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

plot_interaction(merged_data, "Fibroblast_32", "Monocyte")+
    theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

plot_interaction(merged_data, "Fibroblast_36", "T naive CD4")+
    theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

plot_interaction(merged_data, "Fibroblast_20", "Endothel")+
    theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

Sorting interactions in tumor based on pval and prob

# Identifying the top interactions based on the p-value (interactions with pval == 0)
# Group by interaction_name, then filter where pval in the group are 0
interactions_pval_zero.t <- cellchat.t.sC %>%
  group_by(interaction_name) %>%
  filter(pval == 0)
  
# Calculate the number of interactions to select for the top 1%
top_1_percent_count <- ceiling(nrow(interactions_pval_zero.t) * 0.01) # Use ceiling() to round up

# Create a new dataframe sorted by prob and select top 1%
interactions_sorted_prob.t <- interactions_pval_zero.t %>%
  ungroup() %>% # Ungroup to ensure slice_max works as expected
  slice_max(order_by = prob, n = top_1_percent_count)
  
# all interactions summarised, so each interaction = row
# 'source' and 'target' are discrete factors, and 'ligand' and 'receptor' are characters
ggplot(interactions_sorted_prob.t, aes(x = source, y = target, label = interaction_name)) +
  geom_tile(aes(fill = interaction_name), colour = "white") +
  labs(title = "Top 1% Interactions in Tumor", x = "Source", y = "Target", fill = "Interactions") +
  theme_minimal() + #white background 
  theme(
    axis.text.x = element_text(angle = 90, hjust = 1)
  )

# Retrieve the full data for these top interactions
details_top_interactions_t <- cellchat.t.sC %>%
  filter(interaction_name %in% interactions_sorted_prob.t$interaction_name)
#  interactions are NOT summarised, so each interaction = multiple rows
# 'source' and 'target' are discrete factors, and 'ligand' and 'receptor' are characters
ggplot(details_top_interactions_t, aes(x = source, y = target, label = interaction_name)) +
  geom_tile(aes(fill = interaction_name), colour = "white") +
  labs(title = "Top 1% Interactions in Tumor", x = "Source", y = "Target", fill = "Interactions") +
  theme_minimal() + #white background 
  theme(
    axis.text.x = element_text(angle = 90, hjust = 1)
  )

Sorting interactions in skin based on pval and prob

# Identifying the top interactions based on the p-value (interactions with pval == 0)
# Group by interaction_name, then filter where pval in the group are 0
interactions_pval_zero.s <- cellchat.s.sC %>%
  group_by(interaction_name) %>%
  filter(pval == 0)
  
# Calculate the number of interactions to select for the top 1%
top_1_percent_count <- ceiling(nrow(interactions_pval_zero.s) * 0.01) # Use ceiling to round up
# Create a new dataframe sorted by prob and select top 1%
interactions_sorted_prob.s <- interactions_pval_zero.s %>%
  ungroup() %>% # Ungroup to ensure slice_max works as expected
  slice_max(order_by = prob, n = top_1_percent_count)
  
# all interactions summarised, so each interaction = row
# 'source' and 'target' are discrete factors, and 'ligand' and 'receptor' are characters
ggplot(interactions_sorted_prob.s, aes(x = source, y = target, label = interaction_name)) +
  geom_tile(aes(fill = interaction_name), colour = "white") +
  labs(title = "Top 1% Interactions in Skin", x = "Source", y = "Target", fill = "Interactions") +
  theme_minimal() + #white background 
  theme(
    axis.text.x = element_text(angle = 90, hjust = 1)
  )

# Retrieve the full data for these top interactions
details_top_interactions_s <- cellchat.s.sC %>%
  filter(interaction_name %in% interactions_sorted_prob.s$interaction_name)
#  interactions are NOT summarised, so each interaction = multiple rows
# 'source' and 'target' are discrete factors, and 'ligand' and 'receptor' are characters
ggplot(details_top_interactions_s, aes(x = source, y = target, label = interaction_name)) +
  geom_tile(aes(fill = interaction_name), colour = "white") +
  labs(title = "Top 1% Interactions in Skin", x = "Source", y = "Target", fill = "Interactions") +
  theme_minimal() + #white background 
  theme(
    axis.text.x = element_text(angle = 90, hjust = 1)
  )

Top 5% Interactions with Biggest Difference in Probability Tumor

# Merging the two data frames on 'interaction_name'
merged_interactions <- merge(
  interactions_pval_zero.t, 
  interactions_pval_zero.s, 
  by = "interaction_name", 
  suffixes = c(".t", ".s")
)

# Calculate the number of interactions to select for the top 5%
top_5_percent_count <- ceiling(nrow(interactions_pval_zero.s) * 0.05) # Use ceiling to round up

merged_common_interactions <- merged_interactions %>%
  mutate(
    prob_diff = prob.t - prob.s,
    more_common_in = ifelse(prob_diff > 0, "Tumor", "Skin")
  )%>%
  slice_max(order_by = abs(prob_diff), n = top_5_percent_count)

# Plotting
ggplot(merged_common_interactions, aes(x = source.t, y = target.t)) +
  geom_tile(aes(fill = more_common_in), colour = "white") +
  geom_text(aes(label = interaction_name), size = 3, check_overlap = TRUE, vjust = "top") + # Add interaction names
  scale_fill_manual(values = c("Tumor" = rgb(178/255, 24/255, 43/255), "Skin" = rgb(33/255, 102/255, 172/255))) + # Define custom colors for tumor and skin
  labs(title = "Top 5% Interactions with Biggest Difference in Probability (Tumor vs. Skin)", 
       x = "Source", y = "Target", fill = "More Common In") +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 90, hjust = 1),
    strip.background = element_blank(),
    strip.text = element_text(size = 12, face = "bold")
  )

Interaction based on cell types

# Now, let's loop over these top 10 interactions and create the desired plots.
for (i in 1:nrow(top_10_diff)) {
  current_source <- top_10_diff$source[i]
  current_target <- top_10_diff$target[i]

  # Filter the merged_data for the current pair.
  current_data <- merged_data %>%
    filter(source == current_source, target == current_target)

  # Plot
  p <- ggplot(current_data, aes(x = ligand, y = receptor, fill = prob.delta)) +
    geom_tile() +
    scale_fill_gradient2(low = rgb(33/255, 102/255, 172/255), high = rgb(178/255, 24/255, 43/255), mid = "grey", midpoint = 0,
                         limits = c(min(current_data$prob.delta, na.rm = TRUE), 
                                    max(current_data$prob.delta, na.rm = TRUE)),
                         name = "Prob Delta") +
    labs(title = paste("Ligand-Receptor Interactions for", current_source, "to", current_target),
         x = "Ligand", y = "Receptor") +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
          axis.text.y = element_text(hjust = 1))
   
  print(p)

  # Save the plot
  file_name <- paste0("/Users/juliat/Desktop/bachelor/", current_source, "_to_", current_target, ".pdf")
  ggsave(file_name, plot = p, width = 10, height = 10)

}